AOMedia AV1 Codec
svc_encoder_rtc
1/*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12// This is an example demonstrating how to implement a multi-layer AOM
13// encoding scheme for RTC video applications.
14
15#include <assert.h>
16#include <inttypes.h>
17#include <limits.h>
18#include <math.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
23#include <memory>
24
25#include "config/aom_config.h"
26
27#if CONFIG_AV1_DECODER
28#include "aom/aom_decoder.h"
29#endif
30#include "aom/aom_encoder.h"
31#include "aom/aom_image.h"
32#include "aom/aom_integer.h"
33#include "aom/aomcx.h"
34#include "aom_dsp/bitwriter_buffer.h"
35#include "aom_ports/aom_timer.h"
36#include "av1/ratectrl_rtc.h"
37#include "common/args.h"
38#include "common/tools_common.h"
39#include "common/video_writer.h"
40#include "examples/encoder_util.h"
41#include "examples/multilayer_metadata.h"
42
43#define OPTION_BUFFER_SIZE 1024
44#define MAX_NUM_SPATIAL_LAYERS 4
45
46typedef struct {
47 const char *output_filename;
48 char options[OPTION_BUFFER_SIZE];
49 struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
50 int speed;
51 int aq_mode;
52 int layering_mode;
53 int output_obu;
54 int decode;
55 int tune_content;
56 int show_psnr;
57 bool use_external_rc;
58 bool scale_factors_explicitly_set;
59 const char *multilayer_metadata_file;
60} AppInput;
61
62typedef enum {
63 QUANTIZER = 0,
64 BITRATE,
65 SCALE_FACTOR,
66 AUTO_ALT_REF,
67 ALL_OPTION_TYPES
68} LAYER_OPTION_TYPE;
69
70static const arg_def_t outputfile =
71 ARG_DEF("o", "output", 1, "Output filename");
72static const arg_def_t frames_arg =
73 ARG_DEF("f", "frames", 1, "Number of frames to encode");
74static const arg_def_t threads_arg =
75 ARG_DEF("th", "threads", 1, "Number of threads to use");
76static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
77static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
78static const arg_def_t timebase_arg =
79 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
80static const arg_def_t bitrate_arg = ARG_DEF(
81 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
82static const arg_def_t spatial_layers_arg =
83 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
84static const arg_def_t temporal_layers_arg =
85 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
86static const arg_def_t layering_mode_arg =
87 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
88static const arg_def_t kf_dist_arg =
89 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
90static const arg_def_t scale_factors_arg =
91 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
92static const arg_def_t min_q_arg =
93 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
94static const arg_def_t max_q_arg =
95 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
96static const arg_def_t speed_arg =
97 ARG_DEF("sp", "speed", 1, "Speed configuration");
98static const arg_def_t aqmode_arg =
99 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
100static const arg_def_t bitrates_arg =
101 ARG_DEF("bl", "bitrates", 1,
102 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
103static const arg_def_t dropframe_thresh_arg =
104 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
105static const arg_def_t error_resilient_arg =
106 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
107static const arg_def_t output_obu_arg =
108 ARG_DEF(NULL, "output-obu", 1,
109 "Write OBUs when set to 1. Otherwise write IVF files.");
110static const arg_def_t test_decode_arg =
111 ARG_DEF(NULL, "test-decode", 1,
112 "Attempt to test decoding the output when set to 1. Default is 1.");
113static const arg_def_t psnr_arg =
114 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
115static const arg_def_t ext_rc_arg =
116 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
117static const struct arg_enum_list tune_content_enum[] = {
118 { "default", AOM_CONTENT_DEFAULT },
119 { "screen", AOM_CONTENT_SCREEN },
120 { "film", AOM_CONTENT_FILM },
121 { NULL, 0 }
122};
123static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
124 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
125#if CONFIG_CWG_E050
126static const arg_def_t multilayer_metadata_file_arg =
127 ARG_DEF("ml", "multilayer_metadata_file", 1,
128 "Experimental: path to multilayer metadata file");
129#endif
130
131#if CONFIG_AV1_HIGHBITDEPTH
132static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
133 { "10", AOM_BITS_10 },
134 { NULL, 0 } };
135
136static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
137 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
138#endif // CONFIG_AV1_HIGHBITDEPTH
139
140static const arg_def_t *svc_args[] = {
141 &frames_arg,
142 &outputfile,
143 &width_arg,
144 &height_arg,
145 &timebase_arg,
146 &bitrate_arg,
147 &spatial_layers_arg,
148 &kf_dist_arg,
149 &scale_factors_arg,
150 &min_q_arg,
151 &max_q_arg,
152 &temporal_layers_arg,
153 &layering_mode_arg,
154 &threads_arg,
155 &aqmode_arg,
156#if CONFIG_AV1_HIGHBITDEPTH
157 &bitdepth_arg,
158#endif
159 &speed_arg,
160 &bitrates_arg,
161 &dropframe_thresh_arg,
162 &error_resilient_arg,
163 &output_obu_arg,
164 &test_decode_arg,
165 &tune_content_arg,
166 &psnr_arg,
167#if CONFIG_CWG_E050
168 &multilayer_metadata_file_arg,
169#endif
170 NULL,
171};
172
173#define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
174
175static const char *exec_name;
176
177void usage_exit(void) {
178 fprintf(stderr,
179 "Usage: %s <options> input_filename [input_filename ...] -o "
180 "output_filename\n",
181 exec_name);
182 fprintf(stderr, "Options:\n");
183 arg_show_usage(stderr, svc_args);
184 fprintf(
185 stderr,
186 "Input files must be y4m or yuv.\n"
187 "If multiple input files are specified, they correspond to spatial "
188 "layers, and there should be as many as there are spatial layers.\n"
189 "All input files must have the same width, height, frame rate and number "
190 "of frames.\n"
191 "If only one file is specified, it is used for all spatial layers.\n");
192 exit(EXIT_FAILURE);
193}
194
195static int file_is_y4m(const char detect[4]) {
196 return memcmp(detect, "YUV4", 4) == 0;
197}
198
199static int fourcc_is_ivf(const char detect[4]) {
200 if (memcmp(detect, "DKIF", 4) == 0) {
201 return 1;
202 }
203 return 0;
204}
205
206static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
207 1 };
208
209static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
210
211static void open_input_file(struct AvxInputContext *input,
213 /* Parse certain options from the input file, if possible */
214 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
215 : set_binary_mode(stdin);
216
217 if (!input->file) fatal("Failed to open input file");
218
219 if (!fseeko(input->file, 0, SEEK_END)) {
220 /* Input file is seekable. Figure out how long it is, so we can get
221 * progress info.
222 */
223 input->length = ftello(input->file);
224 rewind(input->file);
225 }
226
227 /* Default to 1:1 pixel aspect ratio. */
228 input->pixel_aspect_ratio.numerator = 1;
229 input->pixel_aspect_ratio.denominator = 1;
230
231 /* For RAW input sources, these bytes will applied on the first frame
232 * in read_frame().
233 */
234 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
235 input->detect.position = 0;
236
237 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
238 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
239 input->only_i420) >= 0) {
240 input->file_type = FILE_TYPE_Y4M;
241 input->width = input->y4m.pic_w;
242 input->height = input->y4m.pic_h;
243 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
244 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
245 input->framerate.numerator = input->y4m.fps_n;
246 input->framerate.denominator = input->y4m.fps_d;
247 input->fmt = input->y4m.aom_fmt;
248 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
249 } else {
250 fatal("Unsupported Y4M stream.");
251 }
252 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
253 fatal("IVF is not supported as input.");
254 } else {
255 input->file_type = FILE_TYPE_RAW;
256 }
257}
258
259static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
260 int *value0, int *value1) {
261 if (type == SCALE_FACTOR) {
262 *value0 = (int)strtol(input, &input, 10);
263 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
264 *value1 = (int)strtol(input, &input, 10);
265
266 if (*value0 < option_min_values[SCALE_FACTOR] ||
267 *value1 < option_min_values[SCALE_FACTOR] ||
268 *value0 > option_max_values[SCALE_FACTOR] ||
269 *value1 > option_max_values[SCALE_FACTOR] ||
270 *value0 > *value1) // num shouldn't be greater than den
272 } else {
273 *value0 = atoi(input);
274 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
276 }
277 return AOM_CODEC_OK;
278}
279
280static aom_codec_err_t parse_layer_options_from_string(
281 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
282 int *option0, int *option1) {
284 char *input_string;
285 char *token;
286 const char *delim = ",";
287 int num_layers = svc_params->number_spatial_layers;
288 int i = 0;
289
290 if (type == BITRATE)
291 num_layers =
292 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
293
294 if (input == NULL || option0 == NULL ||
295 (option1 == NULL && type == SCALE_FACTOR))
297
298 const size_t input_length = strlen(input);
299 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
300 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
301 memcpy(input_string, input, input_length + 1);
302 token = strtok(input_string, delim); // NOLINT
303 for (i = 0; i < num_layers; ++i) {
304 if (token != NULL) {
305 res = extract_option(type, token, option0 + i, option1 + i);
306 if (res != AOM_CODEC_OK) break;
307 token = strtok(NULL, delim); // NOLINT
308 } else {
310 break;
311 }
312 }
313 free(input_string);
314 return res;
315}
316
317static void parse_command_line(int argc, const char **argv_,
318 AppInput *app_input,
319 aom_svc_params_t *svc_params,
320 aom_codec_enc_cfg_t *enc_cfg) {
321 struct arg arg;
322 char **argv = NULL;
323 char **argi = NULL;
324 char **argj = NULL;
325 char string_options[1024] = { 0 };
326
327 // Default settings
328 svc_params->number_spatial_layers = 1;
329 svc_params->number_temporal_layers = 1;
330 app_input->layering_mode = 0;
331 app_input->output_obu = 0;
332 app_input->decode = 1;
333 enc_cfg->g_threads = 1;
334 enc_cfg->rc_end_usage = AOM_CBR;
335
336 // process command line options
337 argv = argv_dup(argc - 1, argv_ + 1);
338 if (!argv) {
339 fprintf(stderr, "Error allocating argument list\n");
340 exit(EXIT_FAILURE);
341 }
342 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
343 arg.argv_step = 1;
344
345 if (arg_match(&arg, &outputfile, argi)) {
346 app_input->output_filename = arg.val;
347 } else if (arg_match(&arg, &width_arg, argi)) {
348 enc_cfg->g_w = arg_parse_uint(&arg);
349 } else if (arg_match(&arg, &height_arg, argi)) {
350 enc_cfg->g_h = arg_parse_uint(&arg);
351 } else if (arg_match(&arg, &timebase_arg, argi)) {
352 enc_cfg->g_timebase = arg_parse_rational(&arg);
353 } else if (arg_match(&arg, &bitrate_arg, argi)) {
354 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
355 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
356 svc_params->number_spatial_layers = arg_parse_uint(&arg);
357 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
358 svc_params->number_temporal_layers = arg_parse_uint(&arg);
359 } else if (arg_match(&arg, &speed_arg, argi)) {
360 app_input->speed = arg_parse_uint(&arg);
361 if (app_input->speed > 11) {
362 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
363 }
364 } else if (arg_match(&arg, &aqmode_arg, argi)) {
365 app_input->aq_mode = arg_parse_uint(&arg);
366 } else if (arg_match(&arg, &threads_arg, argi)) {
367 enc_cfg->g_threads = arg_parse_uint(&arg);
368 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
369 app_input->layering_mode = arg_parse_int(&arg);
370 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
371 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
372 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
373 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
374 aom_codec_err_t res = parse_layer_options_from_string(
375 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
376 svc_params->scaling_factor_den);
377 app_input->scale_factors_explicitly_set = true;
378 if (res != AOM_CODEC_OK) {
379 die("Failed to parse scale factors: %s\n",
381 }
382 } else if (arg_match(&arg, &min_q_arg, argi)) {
383 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
384 } else if (arg_match(&arg, &max_q_arg, argi)) {
385 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
386#if CONFIG_AV1_HIGHBITDEPTH
387 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
388 enc_cfg->g_bit_depth =
389 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
390 switch (enc_cfg->g_bit_depth) {
391 case AOM_BITS_8:
392 enc_cfg->g_input_bit_depth = 8;
393 enc_cfg->g_profile = 0;
394 break;
395 case AOM_BITS_10:
396 enc_cfg->g_input_bit_depth = 10;
397 enc_cfg->g_profile = 0;
398 break;
399 default:
400 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
401 }
402#endif // CONFIG_VP9_HIGHBITDEPTH
403 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
404 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
405 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
406 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
407 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
408 die("Invalid value for error resilient (0, 1): %d.",
409 enc_cfg->g_error_resilient);
410 } else if (arg_match(&arg, &output_obu_arg, argi)) {
411 app_input->output_obu = arg_parse_uint(&arg);
412 if (app_input->output_obu != 0 && app_input->output_obu != 1)
413 die("Invalid value for obu output flag (0, 1): %d.",
414 app_input->output_obu);
415 } else if (arg_match(&arg, &test_decode_arg, argi)) {
416 app_input->decode = arg_parse_uint(&arg);
417 if (app_input->decode != 0 && app_input->decode != 1)
418 die("Invalid value for test decode flag (0, 1): %d.",
419 app_input->decode);
420 } else if (arg_match(&arg, &tune_content_arg, argi)) {
421 app_input->tune_content = arg_parse_enum_or_int(&arg);
422 printf("tune content %d\n", app_input->tune_content);
423 } else if (arg_match(&arg, &psnr_arg, argi)) {
424 app_input->show_psnr = 1;
425 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
426 app_input->use_external_rc = true;
427#if CONFIG_CWG_E050
428 } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
429 app_input->multilayer_metadata_file = arg.val;
430#endif
431 } else {
432 ++argj;
433 }
434 }
435
436 // Total bitrate needs to be parsed after the number of layers.
437 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
438 arg.argv_step = 1;
439 if (arg_match(&arg, &bitrates_arg, argi)) {
440 aom_codec_err_t res = parse_layer_options_from_string(
441 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
442 if (res != AOM_CODEC_OK) {
443 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
444 }
445 } else {
446 ++argj;
447 }
448 }
449
450 // There will be a space in front of the string options
451 if (strlen(string_options) > 0)
452 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
453
454 // Check for unrecognized options
455 for (argi = argv; *argi; ++argi)
456 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
457 die("Error: Unrecognized option %s\n", *argi);
458
459 if (argv[0] == NULL) {
460 usage_exit();
461 }
462
463 int input_count = 0;
464 while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
465 app_input->input_ctx[input_count].filename = argv[input_count];
466 ++input_count;
467 }
468 if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
469 die("Error: Number of input files does not match number of spatial layers");
470 }
471 if (argv[input_count] != NULL) {
472 die("Error: Too many input files specified, there should be at most %d",
473 MAX_NUM_SPATIAL_LAYERS);
474 }
475
476 free(argv);
477
478 for (int i = 0; i < input_count; ++i) {
479 open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
480 if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
481 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
482 // Override these settings with the info from Y4M file.
483 enc_cfg->g_w = app_input->input_ctx[i].width;
484 enc_cfg->g_h = app_input->input_ctx[i].height;
485 // g_timebase is the reciprocal of frame rate.
486 enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
487 enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
488 } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
489 enc_cfg->g_h != app_input->input_ctx[i].height ||
490 enc_cfg->g_timebase.num !=
491 app_input->input_ctx[i].framerate.denominator ||
492 enc_cfg->g_timebase.den !=
493 app_input->input_ctx[i].framerate.numerator) {
494 die("Error: Input file dimensions and/or frame rate mismatch");
495 }
496 }
497 }
498 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
499 die("Error: Input file dimensions not set, use -w and -h");
500 }
501
502 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
503 enc_cfg->g_h % 2)
504 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
505
506 printf(
507 "Codec %s\n"
508 "layers: %d\n"
509 "width %u, height: %u\n"
510 "num: %d, den: %d, bitrate: %u\n"
511 "gop size: %u\n",
513 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
514 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
515 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
516}
517
518static const int mode_to_num_temporal_layers[12] = {
519 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
520};
521static const int mode_to_num_spatial_layers[12] = {
522 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
523};
524
525// For rate control encoding stats.
526struct RateControlMetrics {
527 // Number of input frames per layer.
528 int layer_input_frames[AOM_MAX_TS_LAYERS];
529 // Number of encoded non-key frames per layer.
530 int layer_enc_frames[AOM_MAX_TS_LAYERS];
531 // Framerate per layer layer (cumulative).
532 double layer_framerate[AOM_MAX_TS_LAYERS];
533 // Target average frame size per layer (per-frame-bandwidth per layer).
534 double layer_pfb[AOM_MAX_LAYERS];
535 // Actual average frame size per layer.
536 double layer_avg_frame_size[AOM_MAX_LAYERS];
537 // Average rate mismatch per layer (|target - actual| / target).
538 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
539 // Actual encoding bitrate per layer (cumulative across temporal layers).
540 double layer_encoding_bitrate[AOM_MAX_LAYERS];
541 // Average of the short-time encoder actual bitrate.
542 // TODO(marpan): Should we add these short-time stats for each layer?
543 double avg_st_encoding_bitrate;
544 // Variance of the short-time encoder actual bitrate.
545 double variance_st_encoding_bitrate;
546 // Window (number of frames) for computing short-timee encoding bitrate.
547 int window_size;
548 // Number of window measurements.
549 int window_count;
550 int layer_target_bitrate[AOM_MAX_LAYERS];
551};
552
553static const int REF_FRAMES = 8;
554
555static const int INTER_REFS_PER_FRAME = 7;
556
557// Reference frames used in this example encoder.
558enum {
559 SVC_LAST_FRAME = 0,
560 SVC_LAST2_FRAME,
561 SVC_LAST3_FRAME,
562 SVC_GOLDEN_FRAME,
563 SVC_BWDREF_FRAME,
564 SVC_ALTREF2_FRAME,
565 SVC_ALTREF_FRAME
566};
567
568static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
569 FILE *f = input_ctx->file;
570 y4m_input *y4m = &input_ctx->y4m;
571 int shortread = 0;
572
573 if (input_ctx->file_type == FILE_TYPE_Y4M) {
574 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
575 } else {
576 shortread = read_yuv_frame(input_ctx, img);
577 }
578
579 return !shortread;
580}
581
582static void close_input_file(struct AvxInputContext *input) {
583 fclose(input->file);
584 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
585}
586
587// Note: these rate control metrics assume only 1 key frame in the
588// sequence (i.e., first frame only). So for temporal pattern# 7
589// (which has key frame for every frame on base layer), the metrics
590// computation will be off/wrong.
591// TODO(marpan): Update these metrics to account for multiple key frames
592// in the stream.
593static void set_rate_control_metrics(struct RateControlMetrics *rc,
594 double framerate, int ss_number_layers,
595 int ts_number_layers) {
596 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
597 ts_rate_decimator[0] = 1;
598 if (ts_number_layers == 2) {
599 ts_rate_decimator[0] = 2;
600 ts_rate_decimator[1] = 1;
601 }
602 if (ts_number_layers == 3) {
603 ts_rate_decimator[0] = 4;
604 ts_rate_decimator[1] = 2;
605 ts_rate_decimator[2] = 1;
606 }
607 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
608 // per-frame-bandwidth, for the rate control encoding stats below.
609 for (int sl = 0; sl < ss_number_layers; ++sl) {
610 int i = sl * ts_number_layers;
611 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
612 rc->layer_pfb[i] =
613 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
614 for (int tl = 0; tl < ts_number_layers; ++tl) {
615 i = sl * ts_number_layers + tl;
616 if (tl > 0) {
617 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
618 rc->layer_pfb[i] =
619 1000.0 *
620 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
621 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
622 }
623 rc->layer_input_frames[tl] = 0;
624 rc->layer_enc_frames[tl] = 0;
625 rc->layer_encoding_bitrate[i] = 0.0;
626 rc->layer_avg_frame_size[i] = 0.0;
627 rc->layer_avg_rate_mismatch[i] = 0.0;
628 }
629 }
630 rc->window_count = 0;
631 rc->window_size = 15;
632 rc->avg_st_encoding_bitrate = 0.0;
633 rc->variance_st_encoding_bitrate = 0.0;
634}
635
636static void printout_rate_control_summary(struct RateControlMetrics *rc,
637 int frame_cnt, int ss_number_layers,
638 int ts_number_layers) {
639 int tot_num_frames = 0;
640 double perc_fluctuation = 0.0;
641 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
642 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
643 for (int sl = 0; sl < ss_number_layers; ++sl) {
644 tot_num_frames = 0;
645 for (int tl = 0; tl < ts_number_layers; ++tl) {
646 int i = sl * ts_number_layers + tl;
647 const int num_dropped =
648 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
649 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
650 tot_num_frames += rc->layer_input_frames[tl];
651 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
652 rc->layer_encoding_bitrate[i] /
653 tot_num_frames;
654 rc->layer_avg_frame_size[i] =
655 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
656 rc->layer_avg_rate_mismatch[i] =
657 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
658 printf("For layer#: %d %d \n", sl, tl);
659 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
660 rc->layer_encoding_bitrate[i]);
661 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
662 rc->layer_avg_frame_size[i]);
663 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
664 printf(
665 "Number of input frames, encoded (non-key) frames, "
666 "and perc dropped frames: %d %d %f\n",
667 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
668 100.0 * num_dropped / rc->layer_input_frames[tl]);
669 printf("\n");
670 }
671 }
672 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
673 rc->variance_st_encoding_bitrate =
674 rc->variance_st_encoding_bitrate / rc->window_count -
675 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
676 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
677 rc->avg_st_encoding_bitrate;
678 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
679 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
680 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
681 perc_fluctuation);
682 if (frame_cnt - 1 != tot_num_frames)
683 die("Error: Number of input frames not equal to output!\n");
684}
685
686// Layer pattern configuration.
687static void set_layer_pattern(
688 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
689 aom_svc_ref_frame_config_t *ref_frame_config,
690 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
691 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
692 // Setting this flag to 1 enables simplex example of
693 // RPS (Reference Picture Selection) for 1 layer.
694 int use_rps_example = 0;
695 int i;
696 int enable_longterm_temporal_ref = 1;
697 int shift = (layering_mode == 8) ? 2 : 0;
698 int simulcast_mode = (layering_mode == 11);
699 *use_svc_control = 1;
700 layer_id->spatial_layer_id = spatial_layer_id;
701 int lag_index = 0;
702 int base_count = superframe_cnt >> 2;
703 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
704 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
705 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
706 // Set the reference map buffer idx for the 7 references:
707 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
708 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
709 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
710 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
711 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
712
713 if (ksvc_mode) {
714 // Same pattern as case 9, but the reference strucutre will be constrained
715 // below.
716 layering_mode = 9;
717 }
718 switch (layering_mode) {
719 case 0:
720 if (use_rps_example == 0) {
721 // 1-layer: update LAST on every frame, reference LAST.
722 layer_id->temporal_layer_id = 0;
723 layer_id->spatial_layer_id = 0;
724 ref_frame_config->refresh[0] = 1;
725 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
726 } else {
727 // Pattern of 2 references (ALTREF and GOLDEN) trailing
728 // LAST by 4 and 8 frames, with some switching logic to
729 // sometimes only predict from the longer-term reference
730 //(golden here). This is simple example to test RPS
731 // (reference picture selection).
732 int last_idx = 0;
733 int last_idx_refresh = 0;
734 int gld_idx = 0;
735 int alt_ref_idx = 0;
736 int lag_alt = 4;
737 int lag_gld = 8;
738 layer_id->temporal_layer_id = 0;
739 layer_id->spatial_layer_id = 0;
740 int sh = 8; // slots 0 - 7.
741 // Moving index slot for last: 0 - (sh - 1)
742 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
743 // Moving index for refresh of last: one ahead for next frame.
744 last_idx_refresh = superframe_cnt % sh;
745 // Moving index for gld_ref, lag behind current by lag_gld
746 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
747 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
748 if (superframe_cnt > lag_alt)
749 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
750 // Set the ref_idx.
751 // Default all references to slot for last.
752 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
753 ref_frame_config->ref_idx[i] = last_idx;
754 // Set the ref_idx for the relevant references.
755 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
756 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
757 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
758 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
759 // Refresh this slot, which will become LAST on next frame.
760 ref_frame_config->refresh[last_idx_refresh] = 1;
761 // Reference LAST, ALTREF, and GOLDEN
762 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
763 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
764 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
765 // Switch to only GOLDEN every 300 frames.
766 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
767 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
768 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
769 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
770 // Test if the long-term is LAST instead, this is just a renaming
771 // but its tests if encoder behaves the same, whether its
772 // LAST or GOLDEN.
773 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
774 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
775 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
776 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
777 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
778 }
779 }
780 }
781 break;
782 case 1:
783 // 2-temporal layer.
784 // 1 3 5
785 // 0 2 4
786 // Keep golden fixed at slot 3.
787 base_count = superframe_cnt >> 1;
788 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
789 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
790 lag_index = 5;
791 if (base_count > 0) {
792 lag_index = 5 + (base_count % 3);
793 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
794 }
795 // Set the altref slot to lag_index.
796 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
797 if (superframe_cnt % 2 == 0) {
798 layer_id->temporal_layer_id = 0;
799 // Update LAST on layer 0, reference LAST.
800 ref_frame_config->refresh[0] = 1;
801 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
802 // Refresh lag_index slot, needed for lagging golen.
803 ref_frame_config->refresh[lag_index] = 1;
804 // Refresh GOLDEN every x base layer frames.
805 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
806 } else {
807 layer_id->temporal_layer_id = 1;
808 // No updates on layer 1, reference LAST (TL0).
809 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
810 }
811 // Always reference golden and altref on TL0.
812 if (layer_id->temporal_layer_id == 0) {
813 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
814 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
815 }
816 break;
817 case 2:
818 // 3-temporal layer:
819 // 1 3 5 7
820 // 2 6
821 // 0 4 8
822 if (superframe_cnt % 4 == 0) {
823 // Base layer.
824 layer_id->temporal_layer_id = 0;
825 // Update LAST on layer 0, reference LAST.
826 ref_frame_config->refresh[0] = 1;
827 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
828 } else if ((superframe_cnt - 1) % 4 == 0) {
829 layer_id->temporal_layer_id = 2;
830 // First top layer: no updates, only reference LAST (TL0).
831 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
832 } else if ((superframe_cnt - 2) % 4 == 0) {
833 layer_id->temporal_layer_id = 1;
834 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
835 ref_frame_config->refresh[1] = 1;
836 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
837 } else if ((superframe_cnt - 3) % 4 == 0) {
838 layer_id->temporal_layer_id = 2;
839 // Second top layer: no updates, only reference LAST.
840 // Set buffer idx for LAST to slot 1, since that was the slot
841 // updated in previous frame. So LAST is TL1 frame.
842 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
843 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
844 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845 }
846 break;
847 case 3:
848 // 3 TL, same as above, except allow for predicting
849 // off 2 more references (GOLDEN and ALTREF), with
850 // GOLDEN updated periodically, and ALTREF lagging from
851 // LAST from ~4 frames. Both GOLDEN and ALTREF
852 // can only be updated on base temporal layer.
853
854 // Keep golden fixed at slot 3.
855 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
856 // Cyclically refresh slots 5, 6, 7, for lag altref.
857 lag_index = 5;
858 if (base_count > 0) {
859 lag_index = 5 + (base_count % 3);
860 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
861 }
862 // Set the altref slot to lag_index.
863 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
864 if (superframe_cnt % 4 == 0) {
865 // Base layer.
866 layer_id->temporal_layer_id = 0;
867 // Update LAST on layer 0, reference LAST.
868 ref_frame_config->refresh[0] = 1;
869 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
870 // Refresh GOLDEN every x ~10 base layer frames.
871 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
872 // Refresh lag_index slot, needed for lagging altref.
873 ref_frame_config->refresh[lag_index] = 1;
874 } else if ((superframe_cnt - 1) % 4 == 0) {
875 layer_id->temporal_layer_id = 2;
876 // First top layer: no updates, only reference LAST (TL0).
877 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
878 } else if ((superframe_cnt - 2) % 4 == 0) {
879 layer_id->temporal_layer_id = 1;
880 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
881 ref_frame_config->refresh[1] = 1;
882 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
883 } else if ((superframe_cnt - 3) % 4 == 0) {
884 layer_id->temporal_layer_id = 2;
885 // Second top layer: no updates, only reference LAST.
886 // Set buffer idx for LAST to slot 1, since that was the slot
887 // updated in previous frame. So LAST is TL1 frame.
888 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
889 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
890 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
891 }
892 // Every frame can reference GOLDEN AND ALTREF.
893 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
894 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
895 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
896 if (speed >= 7) {
897 ref_frame_comp_pred->use_comp_pred[2] = 1;
898 ref_frame_comp_pred->use_comp_pred[0] = 1;
899 }
900 break;
901 case 4:
902 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
903 // only reference GF (not LAST). Other frames only reference LAST.
904 // 1 3 5 7
905 // 2 6
906 // 0 4 8
907 if (superframe_cnt % 4 == 0) {
908 // Base layer.
909 layer_id->temporal_layer_id = 0;
910 // Update LAST on layer 0, only reference LAST.
911 ref_frame_config->refresh[0] = 1;
912 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913 } else if ((superframe_cnt - 1) % 4 == 0) {
914 layer_id->temporal_layer_id = 2;
915 // First top layer: no updates, only reference LAST (TL0).
916 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
917 } else if ((superframe_cnt - 2) % 4 == 0) {
918 layer_id->temporal_layer_id = 1;
919 // Middle layer (TL1): update GF, only reference LAST (TL0).
920 ref_frame_config->refresh[3] = 1;
921 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
922 } else if ((superframe_cnt - 3) % 4 == 0) {
923 layer_id->temporal_layer_id = 2;
924 // Second top layer: no updates, only reference GF.
925 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
926 }
927 break;
928 case 5:
929 // 2 spatial layers, 1 temporal.
930 layer_id->temporal_layer_id = 0;
931 if (layer_id->spatial_layer_id == 0) {
932 // Reference LAST, update LAST.
933 ref_frame_config->refresh[0] = 1;
934 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
935 } else if (layer_id->spatial_layer_id == 1) {
936 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
937 // and GOLDEN to slot 0. Update slot 1 (LAST).
938 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
939 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
940 ref_frame_config->refresh[1] = 1;
941 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
942 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
943 }
944 break;
945 case 6:
946 // 3 spatial layers, 1 temporal.
947 // Note for this case, we set the buffer idx for all references to be
948 // either LAST or GOLDEN, which are always valid references, since decoder
949 // will check if any of the 7 references is valid scale in
950 // valid_ref_frame_size().
951 layer_id->temporal_layer_id = 0;
952 if (layer_id->spatial_layer_id == 0) {
953 // Reference LAST, update LAST. Set all buffer_idx to 0.
954 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
955 ref_frame_config->ref_idx[i] = 0;
956 ref_frame_config->refresh[0] = 1;
957 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
958 } else if (layer_id->spatial_layer_id == 1) {
959 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
960 // and GOLDEN (and all other refs) to slot 0.
961 // Update slot 1 (LAST).
962 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
963 ref_frame_config->ref_idx[i] = 0;
964 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
965 ref_frame_config->refresh[1] = 1;
966 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
967 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
968 } else if (layer_id->spatial_layer_id == 2) {
969 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
970 // and GOLDEN (and all other refs) to slot 1.
971 // Update slot 2 (LAST).
972 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
973 ref_frame_config->ref_idx[i] = 1;
974 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
975 ref_frame_config->refresh[2] = 1;
976 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
977 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
978 // For 3 spatial layer case: allow for top spatial layer to use
979 // additional temporal reference. Update every 10 frames.
980 if (enable_longterm_temporal_ref) {
981 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
982 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
983 if (base_count % 10 == 0)
984 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
985 }
986 }
987 break;
988 case 7:
989 // 2 spatial and 3 temporal layer.
990 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
991 if (superframe_cnt % 4 == 0) {
992 // Base temporal layer
993 layer_id->temporal_layer_id = 0;
994 if (layer_id->spatial_layer_id == 0) {
995 // Reference LAST, update LAST
996 // Set all buffer_idx to 0
997 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
998 ref_frame_config->ref_idx[i] = 0;
999 ref_frame_config->refresh[0] = 1;
1000 } else if (layer_id->spatial_layer_id == 1) {
1001 // Reference LAST and GOLDEN.
1002 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1003 ref_frame_config->ref_idx[i] = 0;
1004 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1005 ref_frame_config->refresh[1] = 1;
1006 }
1007 } else if ((superframe_cnt - 1) % 4 == 0) {
1008 // First top temporal enhancement layer.
1009 layer_id->temporal_layer_id = 2;
1010 if (layer_id->spatial_layer_id == 0) {
1011 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1012 ref_frame_config->ref_idx[i] = 0;
1013 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1014 ref_frame_config->refresh[3] = 1;
1015 } else if (layer_id->spatial_layer_id == 1) {
1016 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1017 // GOLDEN (and all other refs) to slot 3.
1018 // No update.
1019 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1020 ref_frame_config->ref_idx[i] = 3;
1021 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1022 }
1023 } else if ((superframe_cnt - 2) % 4 == 0) {
1024 // Middle temporal enhancement layer.
1025 layer_id->temporal_layer_id = 1;
1026 if (layer_id->spatial_layer_id == 0) {
1027 // Reference LAST.
1028 // Set all buffer_idx to 0.
1029 // Set GOLDEN to slot 5 and update slot 5.
1030 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1031 ref_frame_config->ref_idx[i] = 0;
1032 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1033 ref_frame_config->refresh[5 - shift] = 1;
1034 } else if (layer_id->spatial_layer_id == 1) {
1035 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1036 // GOLDEN (and all other refs) to slot 5.
1037 // Set LAST3 to slot 6 and update slot 6.
1038 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1039 ref_frame_config->ref_idx[i] = 5 - shift;
1040 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1041 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1042 ref_frame_config->refresh[6 - shift] = 1;
1043 }
1044 } else if ((superframe_cnt - 3) % 4 == 0) {
1045 // Second top temporal enhancement layer.
1046 layer_id->temporal_layer_id = 2;
1047 if (layer_id->spatial_layer_id == 0) {
1048 // Set LAST to slot 5 and reference LAST.
1049 // Set GOLDEN to slot 3 and update slot 3.
1050 // Set all other buffer_idx to 0.
1051 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1052 ref_frame_config->ref_idx[i] = 0;
1053 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1054 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1055 ref_frame_config->refresh[3] = 1;
1056 } else if (layer_id->spatial_layer_id == 1) {
1057 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1058 // GOLDEN to slot 3. No update.
1059 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1060 ref_frame_config->ref_idx[i] = 0;
1061 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1062 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1063 }
1064 }
1065 break;
1066 case 8:
1067 // 3 spatial and 3 temporal layer.
1068 // Same as case 9 but overalap in the buffer slot updates.
1069 // (shift = 2). The slots 3 and 4 updated by first TL2 are
1070 // reused for update in TL1 superframe.
1071 // Note for this case, frame order hint must be disabled for
1072 // lower resolutios (operating points > 0) to be decoedable.
1073 case 9:
1074 // 3 spatial and 3 temporal layer.
1075 // No overlap in buffer updates between TL2 and TL1.
1076 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1077 // Set the references via the svc_ref_frame_config control.
1078 // Always reference LAST.
1079 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1080 if (superframe_cnt % 4 == 0) {
1081 // Base temporal layer.
1082 layer_id->temporal_layer_id = 0;
1083 if (layer_id->spatial_layer_id == 0) {
1084 // Reference LAST, update LAST.
1085 // Set all buffer_idx to 0.
1086 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1087 ref_frame_config->ref_idx[i] = 0;
1088 ref_frame_config->refresh[0] = 1;
1089 } else if (layer_id->spatial_layer_id == 1) {
1090 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1091 // GOLDEN (and all other refs) to slot 0.
1092 // Update slot 1 (LAST).
1093 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094 ref_frame_config->ref_idx[i] = 0;
1095 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1096 ref_frame_config->refresh[1] = 1;
1097 } else if (layer_id->spatial_layer_id == 2) {
1098 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1099 // GOLDEN (and all other refs) to slot 1.
1100 // Update slot 2 (LAST).
1101 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102 ref_frame_config->ref_idx[i] = 1;
1103 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1104 ref_frame_config->refresh[2] = 1;
1105 }
1106 } else if ((superframe_cnt - 1) % 4 == 0) {
1107 // First top temporal enhancement layer.
1108 layer_id->temporal_layer_id = 2;
1109 if (layer_id->spatial_layer_id == 0) {
1110 // Reference LAST (slot 0).
1111 // Set GOLDEN to slot 3 and update slot 3.
1112 // Set all other buffer_idx to slot 0.
1113 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1114 ref_frame_config->ref_idx[i] = 0;
1115 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1116 ref_frame_config->refresh[3] = 1;
1117 } else if (layer_id->spatial_layer_id == 1) {
1118 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1119 // GOLDEN (and all other refs) to slot 3.
1120 // Set LAST2 to slot 4 and Update slot 4.
1121 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1122 ref_frame_config->ref_idx[i] = 3;
1123 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1124 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1125 ref_frame_config->refresh[4] = 1;
1126 } else if (layer_id->spatial_layer_id == 2) {
1127 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1128 // GOLDEN (and all other refs) to slot 4.
1129 // No update.
1130 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1131 ref_frame_config->ref_idx[i] = 4;
1132 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1133 }
1134 } else if ((superframe_cnt - 2) % 4 == 0) {
1135 // Middle temporal enhancement layer.
1136 layer_id->temporal_layer_id = 1;
1137 if (layer_id->spatial_layer_id == 0) {
1138 // Reference LAST.
1139 // Set all buffer_idx to 0.
1140 // Set GOLDEN to slot 5 and update slot 5.
1141 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142 ref_frame_config->ref_idx[i] = 0;
1143 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1144 ref_frame_config->refresh[5 - shift] = 1;
1145 } else if (layer_id->spatial_layer_id == 1) {
1146 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1147 // GOLDEN (and all other refs) to slot 5.
1148 // Set LAST3 to slot 6 and update slot 6.
1149 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1150 ref_frame_config->ref_idx[i] = 5 - shift;
1151 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1152 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1153 ref_frame_config->refresh[6 - shift] = 1;
1154 } else if (layer_id->spatial_layer_id == 2) {
1155 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1156 // GOLDEN (and all other refs) to slot 6.
1157 // Set LAST3 to slot 7 and update slot 7.
1158 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1159 ref_frame_config->ref_idx[i] = 6 - shift;
1160 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1161 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1162 ref_frame_config->refresh[7 - shift] = 1;
1163 }
1164 } else if ((superframe_cnt - 3) % 4 == 0) {
1165 // Second top temporal enhancement layer.
1166 layer_id->temporal_layer_id = 2;
1167 if (layer_id->spatial_layer_id == 0) {
1168 // Set LAST to slot 5 and reference LAST.
1169 // Set GOLDEN to slot 3 and update slot 3.
1170 // Set all other buffer_idx to 0.
1171 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1172 ref_frame_config->ref_idx[i] = 0;
1173 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1174 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1175 ref_frame_config->refresh[3] = 1;
1176 } else if (layer_id->spatial_layer_id == 1) {
1177 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1178 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1179 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1180 ref_frame_config->ref_idx[i] = 0;
1181 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1182 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1183 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1184 ref_frame_config->refresh[4] = 1;
1185 } else if (layer_id->spatial_layer_id == 2) {
1186 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1187 // GOLDEN to slot 4. No update.
1188 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1189 ref_frame_config->ref_idx[i] = 0;
1190 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1191 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1192 }
1193 }
1194 break;
1195 case 11:
1196 // Simulcast mode for 3 spatial and 3 temporal layers.
1197 // No inter-layer predicton, only prediction is temporal and single
1198 // reference (LAST).
1199 // No overlap in buffer slots between spatial layers. So for example,
1200 // SL0 only uses slots 0 and 1.
1201 // SL1 only uses slots 2 and 3.
1202 // SL2 only uses slots 4 and 5.
1203 // All 7 references for each inter-frame must only access buffer slots
1204 // for that spatial layer.
1205 // On key (super)frames: SL1 and SL2 must have no references set
1206 // and must refresh all the slots for that layer only (so 2 and 3
1207 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1208 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1209 // internally as Intra-only frames that allow that stream to be decoded.
1210 // These conditions will allow for each spatial stream to be
1211 // independently decodeable.
1212
1213 // Initialize all references to 0 (don't use reference).
1214 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215 ref_frame_config->reference[i] = 0;
1216 // Initialize as no refresh/update for all slots.
1217 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1218 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1219 ref_frame_config->ref_idx[i] = 0;
1220
1221 if (is_key_frame) {
1222 if (layer_id->spatial_layer_id == 0) {
1223 // Assign LAST/GOLDEN to slot 0/1.
1224 // Refesh slots 0 and 1 for SL0.
1225 // SL0: this will get set to KEY frame internally.
1226 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1227 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1228 ref_frame_config->refresh[0] = 1;
1229 ref_frame_config->refresh[1] = 1;
1230 } else if (layer_id->spatial_layer_id == 1) {
1231 // Assign LAST/GOLDEN to slot 2/3.
1232 // Refesh slots 2 and 3 for SL1.
1233 // This will get set to Intra-only frame internally.
1234 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1235 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1236 ref_frame_config->refresh[2] = 1;
1237 ref_frame_config->refresh[3] = 1;
1238 } else if (layer_id->spatial_layer_id == 2) {
1239 // Assign LAST/GOLDEN to slot 4/5.
1240 // Refresh slots 4 and 5 for SL2.
1241 // This will get set to Intra-only frame internally.
1242 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1243 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1244 ref_frame_config->refresh[4] = 1;
1245 ref_frame_config->refresh[5] = 1;
1246 }
1247 } else if (superframe_cnt % 4 == 0) {
1248 // Base temporal layer: TL0
1249 layer_id->temporal_layer_id = 0;
1250 if (layer_id->spatial_layer_id == 0) { // SL0
1251 // Reference LAST. Assign all references to either slot
1252 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1253 // Update slot 0 (LAST).
1254 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1255 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1256 ref_frame_config->ref_idx[i] = 1;
1257 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1258 ref_frame_config->refresh[0] = 1;
1259 } else if (layer_id->spatial_layer_id == 1) { // SL1
1260 // Reference LAST. Assign all references to either slot
1261 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1262 // Update slot 2 (LAST).
1263 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265 ref_frame_config->ref_idx[i] = 3;
1266 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1267 ref_frame_config->refresh[2] = 1;
1268 } else if (layer_id->spatial_layer_id == 2) { // SL2
1269 // Reference LAST. Assign all references to either slot
1270 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1271 // Update slot 4 (LAST).
1272 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1273 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1274 ref_frame_config->ref_idx[i] = 5;
1275 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1276 ref_frame_config->refresh[4] = 1;
1277 }
1278 } else if ((superframe_cnt - 1) % 4 == 0) {
1279 // First top temporal enhancement layer: TL2
1280 layer_id->temporal_layer_id = 2;
1281 if (layer_id->spatial_layer_id == 0) { // SL0
1282 // Reference LAST (slot 0). Assign other references to slot 1.
1283 // No update/refresh on any slots.
1284 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1285 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1286 ref_frame_config->ref_idx[i] = 1;
1287 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1288 } else if (layer_id->spatial_layer_id == 1) { // SL1
1289 // Reference LAST (slot 2). Assign other references to slot 3.
1290 // No update/refresh on any slots.
1291 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1292 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1293 ref_frame_config->ref_idx[i] = 3;
1294 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1295 } else if (layer_id->spatial_layer_id == 2) { // SL2
1296 // Reference LAST (slot 4). Assign other references to slot 4.
1297 // No update/refresh on any slots.
1298 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1299 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1300 ref_frame_config->ref_idx[i] = 5;
1301 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1302 }
1303 } else if ((superframe_cnt - 2) % 4 == 0) {
1304 // Middle temporal enhancement layer: TL1
1305 layer_id->temporal_layer_id = 1;
1306 if (layer_id->spatial_layer_id == 0) { // SL0
1307 // Reference LAST (slot 0).
1308 // Set GOLDEN to slot 1 and update slot 1.
1309 // This will be used as reference for next TL2.
1310 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1311 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1312 ref_frame_config->ref_idx[i] = 1;
1313 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1314 ref_frame_config->refresh[1] = 1;
1315 } else if (layer_id->spatial_layer_id == 1) { // SL1
1316 // Reference LAST (slot 2).
1317 // Set GOLDEN to slot 3 and update slot 3.
1318 // This will be used as reference for next TL2.
1319 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1320 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1321 ref_frame_config->ref_idx[i] = 3;
1322 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1323 ref_frame_config->refresh[3] = 1;
1324 } else if (layer_id->spatial_layer_id == 2) { // SL2
1325 // Reference LAST (slot 4).
1326 // Set GOLDEN to slot 5 and update slot 5.
1327 // This will be used as reference for next TL2.
1328 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1329 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1330 ref_frame_config->ref_idx[i] = 5;
1331 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1332 ref_frame_config->refresh[5] = 1;
1333 }
1334 } else if ((superframe_cnt - 3) % 4 == 0) {
1335 // Second top temporal enhancement layer: TL2
1336 layer_id->temporal_layer_id = 2;
1337 if (layer_id->spatial_layer_id == 0) { // SL0
1338 // Reference LAST (slot 1). Assign other references to slot 0.
1339 // No update/refresh on any slots.
1340 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1341 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1342 ref_frame_config->ref_idx[i] = 0;
1343 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1344 } else if (layer_id->spatial_layer_id == 1) { // SL1
1345 // Reference LAST (slot 3). Assign other references to slot 2.
1346 // No update/refresh on any slots.
1347 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1348 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1349 ref_frame_config->ref_idx[i] = 2;
1350 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1351 } else if (layer_id->spatial_layer_id == 2) { // SL2
1352 // Reference LAST (slot 5). Assign other references to slot 4.
1353 // No update/refresh on any slots.
1354 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1355 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1356 ref_frame_config->ref_idx[i] = 4;
1357 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1358 }
1359 }
1360 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1361 // Always reference GOLDEN (inter-layer prediction).
1362 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1363 if (ksvc_mode) {
1364 // KSVC: only keep the inter-layer reference (GOLDEN) for
1365 // superframes whose base is key.
1366 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1367 }
1368 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1369 // On superframes whose base is key: remove LAST to avoid prediction
1370 // off layer two levels below.
1371 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1372 }
1373 }
1374 // For 3 spatial layer case 8 (where there is free buffer slot):
1375 // allow for top spatial layer to use additional temporal reference.
1376 // Additional reference is only updated on base temporal layer, every
1377 // 10 TL0 frames here.
1378 if (!simulcast_mode && enable_longterm_temporal_ref &&
1379 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1380 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1381 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1382 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1383 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1384 }
1385 break;
1386 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1387 }
1388}
1389
1390static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data,
1391 uint8_t bits, uint32_t offset = 0) {
1392 if (bits > 32) {
1393 die("Invalid bits value %d > 32\n", bits);
1394 }
1395 const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1);
1396 if (data < offset || (data - offset) > max) {
1397 die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset,
1398 (uint64_t)max + offset);
1399 }
1400 aom_wb_write_unsigned_literal(wb, data - offset, bits);
1401}
1402
1403static void write_depth_representation_element(
1404 struct aom_write_bit_buffer *buffer,
1405 const std::pair<libaom_examples::DepthRepresentationElement, bool>
1406 &element) {
1407 if (!element.second) {
1408 return;
1409 }
1410 write_literal(buffer, element.first.sign_flag, 1);
1411 write_literal(buffer, element.first.exponent, 7);
1412 if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) {
1413 die("Invalid mantissan_len %d\n", element.first.mantissa_len);
1414 }
1415 write_literal(buffer, element.first.mantissa_len - 1, 5);
1416 write_literal(buffer, element.first.mantissa, element.first.mantissa_len);
1417}
1418
1419static void write_color_properties(
1420 struct aom_write_bit_buffer *buffer,
1421 const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1422 write_literal(buffer, color_properties.second, 1);
1423 if (color_properties.second) {
1424 write_literal(buffer, color_properties.first.color_range, 1);
1425 write_literal(buffer, color_properties.first.color_primaries, 8);
1426 write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1427 write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1428 } else {
1429 write_literal(buffer, 0, 1); // reserved_1bit
1430 }
1431}
1432
1433static void add_multilayer_metadata(
1434 aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1435 // Large enough buffer for the multilayer metadata.
1436 // Each layer's metadata is less than 100 bytes and there are at most 4
1437 // layers.
1438 std::vector<uint8_t> data(1024);
1439 struct aom_write_bit_buffer buffer = { data.data(), 0 };
1440
1441 write_literal(&buffer, multilayer.use_case, 6);
1442 if (multilayer.layers.empty()) {
1443 die("Invalid multilayer metadata, no layers found\n");
1444 } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1445 die("Invalid multilayer metadata, too many layers (max is %d)\n",
1446 MAX_NUM_SPATIAL_LAYERS);
1447 }
1448 write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1449 assert(buffer.bit_offset % 8 == 0);
1450 for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1451 const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1452 // Alpha info with segmentation with labels can be up to about 66k bytes,
1453 // which requires 3 bytes to encode in leb128.
1454 const int bytes_reserved_for_size = 3;
1455 // Placeholder for layer_metadata_size which will be written later.
1456 write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1457 const uint32_t metadata_start = buffer.bit_offset;
1458 write_literal(&buffer, (int)i, 2); // ml_spatial_id
1459 write_literal(&buffer, layer.layer_type, 5);
1460 write_literal(&buffer, layer.luma_plane_only_flag, 1);
1461 write_literal(&buffer, layer.layer_view_type, 3);
1462 write_literal(&buffer, layer.group_id, 2);
1463 write_literal(&buffer, layer.layer_dependency_idc, 3);
1464 write_literal(&buffer, layer.layer_metadata_scope, 2);
1465 write_literal(&buffer, 0, 4); // ml_reserved_4bits
1466
1467 if (i > 0) {
1468 write_color_properties(&buffer, layer.layer_color_description);
1469 } else {
1470 write_literal(&buffer, 0, 2); // ml_reserved_2bits
1471 }
1472 assert(buffer.bit_offset % 8 == 0);
1473
1474 if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA &&
1475 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1476 const libaom_examples::AlphaInformation &alpha_info =
1477 layer.global_alpha_info;
1478 write_literal(&buffer, alpha_info.alpha_use_idc, 2);
1479 write_literal(&buffer, alpha_info.alpha_simple_flag, 1);
1480 if (!alpha_info.alpha_simple_flag) {
1481 write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1482 write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1483 write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1484 write_literal(&buffer, alpha_info.alpha_transparent_value,
1485 alpha_info.alpha_bit_depth + 1);
1486 write_literal(&buffer, alpha_info.alpha_opaque_value,
1487 alpha_info.alpha_bit_depth + 1);
1488 if (buffer.bit_offset % 8 != 0) {
1489 // ai_byte_alignment_bits
1490 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1491 }
1492 assert(buffer.bit_offset % 8 == 0);
1493
1494 write_literal(&buffer, 0, 6); // ai_reserved_6bits
1495 write_color_properties(&buffer, alpha_info.alpha_color_description);
1496 } else {
1497 write_literal(&buffer, 0, 5); // ai_reserved_5bits
1498 }
1499
1500 assert(buffer.bit_offset % 8 == 0);
1501 } else if (layer.layer_type ==
1502 libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH &&
1503 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1504 const libaom_examples::DepthInformation &depth_info =
1505 layer.global_depth_info;
1506 write_literal(&buffer, depth_info.z_near.second, 1);
1507 write_literal(&buffer, depth_info.z_far.second, 1);
1508 write_literal(&buffer, depth_info.d_min.second, 1);
1509 write_literal(&buffer, depth_info.d_max.second, 1);
1510 write_literal(&buffer, depth_info.depth_representation_type, 4);
1511 if (depth_info.d_min.second || depth_info.d_max.second) {
1512 write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1513 }
1514 write_depth_representation_element(&buffer, depth_info.z_near);
1515 write_depth_representation_element(&buffer, depth_info.z_far);
1516 write_depth_representation_element(&buffer, depth_info.d_min);
1517 write_depth_representation_element(&buffer, depth_info.d_max);
1518 if (buffer.bit_offset % 8 != 0) {
1519 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1520 }
1521 assert(buffer.bit_offset % 8 == 0);
1522 }
1523
1524 assert(buffer.bit_offset % 8 == 0);
1525
1526 const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1527 const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1528 size_t coded_size;
1529 if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1530 bytes_reserved_for_size,
1531 &buffer.bit_buffer[size_pos], &coded_size)) {
1532 // Need to increase bytes_reserved_for_size in the code above.
1533 die("Error: Failed to write metadata size\n");
1534 }
1535 }
1536 assert(buffer.bit_offset % 8 == 0);
1537 if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1538 buffer.bit_buffer, buffer.bit_offset / 8,
1540 die("Error: Failed to add metadata\n");
1541 }
1542}
1543
1544#if CONFIG_AV1_DECODER
1545// Returns whether there is a mismatch between the encoder's new frame and the
1546// decoder's new frame.
1547static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1548 const int frames_out) {
1549 aom_image_t enc_img, dec_img;
1550 int mismatch = 0;
1551
1552 /* Get the internal new frame */
1555
1556#if CONFIG_AV1_HIGHBITDEPTH
1557 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1558 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1559 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1560 aom_image_t enc_hbd_img;
1562 &enc_hbd_img,
1563 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1564 enc_img.d_w, enc_img.d_h, 16);
1565 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1566 enc_img = enc_hbd_img;
1567 }
1568 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1569 aom_image_t dec_hbd_img;
1571 &dec_hbd_img,
1572 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1573 dec_img.d_w, dec_img.d_h, 16);
1574 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1575 dec_img = dec_hbd_img;
1576 }
1577 }
1578#endif
1579
1580 if (!aom_compare_img(&enc_img, &dec_img)) {
1581 int y[4], u[4], v[4];
1582#if CONFIG_AV1_HIGHBITDEPTH
1583 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1584 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1585 } else {
1586 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1587 }
1588#else
1589 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1590#endif
1591 fprintf(stderr,
1592 "Encode/decode mismatch on frame %d at"
1593 " Y[%d, %d] {%d/%d},"
1594 " U[%d, %d] {%d/%d},"
1595 " V[%d, %d] {%d/%d}\n",
1596 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1597 v[1], v[2], v[3]);
1598 mismatch = 1;
1599 }
1600
1601 aom_img_free(&enc_img);
1602 aom_img_free(&dec_img);
1603 return mismatch;
1604}
1605#endif // CONFIG_AV1_DECODER
1606
1607struct psnr_stats {
1608 // The second element of these arrays is reserved for high bitdepth.
1609 uint64_t psnr_sse_total[2];
1610 uint64_t psnr_samples_total[2];
1611 double psnr_totals[2][4];
1612 int psnr_count[2];
1613};
1614
1615static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1616 double ovpsnr;
1617
1618 if (!psnr_stream->psnr_count[0]) return;
1619
1620 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1621 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1622 (double)psnr_stream->psnr_sse_total[0]);
1623 fprintf(stderr, " %.3f", ovpsnr);
1624
1625 for (int i = 0; i < 4; i++) {
1626 fprintf(stderr, " %.3f",
1627 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1628 }
1629 fprintf(stderr, "\n");
1630}
1631
1632static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1633 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1634 aom::AV1RateControlRtcConfig rc_cfg;
1635 rc_cfg.width = cfg.g_w;
1636 rc_cfg.height = cfg.g_h;
1637 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1638 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1639 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1640 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1641 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1642 rc_cfg.buf_sz = cfg.rc_buf_sz;
1643 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1644 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1645 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1646 rc_cfg.max_intra_bitrate_pct = 300;
1647 rc_cfg.framerate = cfg.g_timebase.den;
1648 // TODO(jianj): Add suppor for SVC.
1649 rc_cfg.ss_number_layers = 1;
1650 rc_cfg.ts_number_layers = 1;
1651 rc_cfg.scaling_factor_num[0] = 1;
1652 rc_cfg.scaling_factor_den[0] = 1;
1653 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1654 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1655 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1656 rc_cfg.aq_mode = app_input.aq_mode;
1657
1658 return rc_cfg;
1659}
1660
1661static int qindex_to_quantizer(int qindex) {
1662 // Table that converts 0-63 range Q values passed in outside to the 0-255
1663 // range Qindex used internally.
1664 static const int quantizer_to_qindex[] = {
1665 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1666 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1667 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1668 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1669 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1670 };
1671 for (int quantizer = 0; quantizer < 64; ++quantizer)
1672 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1673
1674 return 63;
1675}
1676
1677static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1678 aom_codec_ctx_t *codec, int frame_cnt) {
1679 aom_active_map_t map = { 0, 0, 0 };
1680
1681 map.rows = (cfg->g_h + 15) / 16;
1682 map.cols = (cfg->g_w + 15) / 16;
1683
1684 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1685 if (!map.active_map) die("Failed to allocate active map");
1686
1687 // Example map for testing.
1688 for (unsigned int i = 0; i < map.rows; ++i) {
1689 for (unsigned int j = 0; j < map.cols; ++j) {
1690 int index = map.cols * i + j;
1691 map.active_map[index] = 1;
1692 if (frame_cnt < 300) {
1693 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1694 } else if (frame_cnt >= 300) {
1695 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1696 }
1697 }
1698 }
1699
1700 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1701 die_codec(codec, "Failed to set active map");
1702
1703 free(map.active_map);
1704}
1705
1706int main(int argc, const char **argv) {
1707 AppInput app_input;
1708 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1709 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1710 AvxVideoWriter *total_layer_file = NULL;
1711 FILE *total_layer_obu_file = NULL;
1713 int frame_cnt = 0;
1714 aom_image_t raw;
1715 int frame_avail;
1716 int got_data = 0;
1717 int flags = 0;
1718 int i;
1719 int pts = 0; // PTS starts at 0.
1720 int frame_duration = 1; // 1 timebase tick per frame.
1721 aom_svc_layer_id_t layer_id;
1722 aom_svc_params_t svc_params;
1723 aom_svc_ref_frame_config_t ref_frame_config;
1724 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1725
1726#if CONFIG_INTERNAL_STATS
1727 FILE *stats_file = fopen("opsnr.stt", "a");
1728 if (stats_file == NULL) {
1729 die("Cannot open opsnr.stt\n");
1730 }
1731#endif
1732#if CONFIG_AV1_DECODER
1733 aom_codec_ctx_t decoder;
1734#endif
1735
1736 struct RateControlMetrics rc;
1737 int64_t cx_time = 0;
1738 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1739 int frame_cnt_layer[AOM_MAX_LAYERS];
1740 double sum_bitrate = 0.0;
1741 double sum_bitrate2 = 0.0;
1742 double framerate = 30.0;
1743 int use_svc_control = 1;
1744 int set_err_resil_frame = 0;
1745 int test_changing_bitrate = 0;
1746 zero(rc.layer_target_bitrate);
1747 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1748 memset(&app_input, 0, sizeof(AppInput));
1749 memset(&svc_params, 0, sizeof(svc_params));
1750
1751 // Flag to test dynamic scaling of source frames for single
1752 // spatial stream, using the scaling_mode control.
1753 const int test_dynamic_scaling_single_layer = 0;
1754
1755 // Flag to test setting speed per layer.
1756 const int test_speed_per_layer = 0;
1757
1758 // Flag for testing active maps.
1759 const int test_active_maps = 0;
1760
1761 /* Setup default input stream settings */
1762 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1763 app_input.input_ctx[i].framerate.numerator = 30;
1764 app_input.input_ctx[i].framerate.denominator = 1;
1765 app_input.input_ctx[i].only_i420 = 0;
1766 app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1767 }
1768 app_input.speed = 7;
1769 exec_name = argv[0];
1770
1771 // start with default encoder configuration
1774 if (res != AOM_CODEC_OK) {
1775 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1776 }
1777
1778 // Real time parameters.
1780
1781 cfg.rc_end_usage = AOM_CBR;
1782 cfg.rc_min_quantizer = 2;
1783 cfg.rc_max_quantizer = 52;
1784 cfg.rc_undershoot_pct = 50;
1785 cfg.rc_overshoot_pct = 50;
1786 cfg.rc_buf_initial_sz = 600;
1787 cfg.rc_buf_optimal_sz = 600;
1788 cfg.rc_buf_sz = 1000;
1789 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1790 cfg.g_lag_in_frames = 0;
1791 cfg.kf_mode = AOM_KF_AUTO;
1792 cfg.g_w = 0; // Force user to specify width and height for raw input.
1793 cfg.g_h = 0;
1794
1795 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1796
1797 int ts_number_layers = svc_params.number_temporal_layers;
1798 int ss_number_layers = svc_params.number_spatial_layers;
1799
1800 unsigned int width = cfg.g_w;
1801 unsigned int height = cfg.g_h;
1802
1803 if (app_input.layering_mode >= 0) {
1804 if (ts_number_layers !=
1805 mode_to_num_temporal_layers[app_input.layering_mode] ||
1806 ss_number_layers !=
1807 mode_to_num_spatial_layers[app_input.layering_mode]) {
1808 die("Number of layers doesn't match layering mode.");
1809 }
1810 }
1811
1812 bool has_non_y4m_input = false;
1813 for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1814 if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1815 has_non_y4m_input = true;
1816 break;
1817 }
1818 }
1819 // Y4M reader has its own allocation.
1820 if (has_non_y4m_input) {
1821 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1822 die("Failed to allocate image (%dx%d)", width, height);
1823 }
1824 }
1825
1827
1828 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1829 sizeof(svc_params.layer_target_bitrate));
1830
1831 unsigned int total_rate = 0;
1832 for (i = 0; i < ss_number_layers; i++) {
1833 total_rate +=
1834 svc_params
1835 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1836 }
1837 if (total_rate != cfg.rc_target_bitrate) {
1838 die("Incorrect total target bitrate, expected: %d", total_rate);
1839 }
1840
1841 svc_params.framerate_factor[0] = 1;
1842 if (ts_number_layers == 2) {
1843 svc_params.framerate_factor[0] = 2;
1844 svc_params.framerate_factor[1] = 1;
1845 } else if (ts_number_layers == 3) {
1846 svc_params.framerate_factor[0] = 4;
1847 svc_params.framerate_factor[1] = 2;
1848 svc_params.framerate_factor[2] = 1;
1849 }
1850
1851 libaom_examples::MultilayerMetadata multilayer_metadata;
1852 if (app_input.multilayer_metadata_file != NULL) {
1853 if (!libaom_examples::parse_multilayer_file(
1854 app_input.multilayer_metadata_file, &multilayer_metadata)) {
1855 die("Failed to parse multilayer metadata");
1856 }
1857 libaom_examples::print_multilayer_metadata(multilayer_metadata);
1858 }
1859
1860 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1861 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1862
1863 AvxVideoInfo info;
1864 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1865 info.frame_width = cfg.g_w;
1866 info.frame_height = cfg.g_h;
1867 info.time_base.numerator = cfg.g_timebase.num;
1868 info.time_base.denominator = cfg.g_timebase.den;
1869 // Open an output file for each stream.
1870 for (int sl = 0; sl < ss_number_layers; ++sl) {
1871 for (int tl = 0; tl < ts_number_layers; ++tl) {
1872 i = sl * ts_number_layers + tl;
1873 char file_name[PATH_MAX];
1874 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1875 app_input.output_filename, i);
1876 if (app_input.output_obu) {
1877 obu_files[i] = fopen(file_name, "wb");
1878 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1879 } else {
1880 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1881 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1882 }
1883 }
1884 }
1885 if (app_input.output_obu) {
1886 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1887 if (!total_layer_obu_file)
1888 die("Failed to open %s for writing", app_input.output_filename);
1889 } else {
1890 total_layer_file =
1891 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1892 if (!total_layer_file)
1893 die("Failed to open %s for writing", app_input.output_filename);
1894 }
1895
1896 // Initialize codec.
1897 aom_codec_ctx_t codec;
1898 aom_codec_flags_t flag = 0;
1900 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1901 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1902 die_codec(&codec, "Failed to initialize encoder");
1903
1904#if CONFIG_AV1_DECODER
1905 if (app_input.decode) {
1906 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1907 die_codec(&decoder, "Failed to initialize decoder");
1908 }
1909#endif
1910
1911 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1912 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1927
1928 // Settings to reduce key frame encoding time.
1934
1936
1937 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1938 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1940 // INTRABC is currently disabled for rt mode, as it's too slow.
1942 }
1943
1944 if (app_input.use_external_rc) {
1946 }
1947
1949
1952
1954
1955 svc_params.number_spatial_layers = ss_number_layers;
1956 svc_params.number_temporal_layers = ts_number_layers;
1957 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1958 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1959 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1960 }
1961 if (!app_input.scale_factors_explicitly_set) {
1962 for (i = 0; i < ss_number_layers; ++i) {
1963 svc_params.scaling_factor_num[i] = 1;
1964 svc_params.scaling_factor_den[i] = 1;
1965 }
1966 if (ss_number_layers == 2) {
1967 svc_params.scaling_factor_num[0] = 1;
1968 svc_params.scaling_factor_den[0] = 2;
1969 } else if (ss_number_layers == 3) {
1970 svc_params.scaling_factor_num[0] = 1;
1971 svc_params.scaling_factor_den[0] = 4;
1972 svc_params.scaling_factor_num[1] = 1;
1973 svc_params.scaling_factor_den[1] = 2;
1974 }
1975 }
1976 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1977 // TODO(aomedia:3032): Configure KSVC in fixed mode.
1978
1979 // This controls the maximum target size of the key frame.
1980 // For generating smaller key frames, use a smaller max_intra_size_pct
1981 // value, like 100 or 200.
1982 {
1983 const int max_intra_size_pct = 300;
1985 max_intra_size_pct);
1986 }
1987
1988 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1989 cx_time_layer[lx] = 0;
1990 frame_cnt_layer[lx] = 0;
1991 }
1992
1993 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1994 if (app_input.use_external_rc) {
1995 const aom::AV1RateControlRtcConfig rc_cfg =
1996 create_rtc_rc_config(cfg, app_input);
1997 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1998 }
1999
2000 frame_avail = 1;
2001 struct psnr_stats psnr_stream;
2002 memset(&psnr_stream, 0, sizeof(psnr_stream));
2003 while (frame_avail || got_data) {
2004 struct aom_usec_timer timer;
2005 frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2006 // Loop over spatial layers.
2007 for (int slx = 0; slx < ss_number_layers; slx++) {
2008 if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2009 const int previous_layer_frame_avail = frame_avail;
2010 frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2011 if (previous_layer_frame_avail != frame_avail) {
2012 die("Mismatch in number of frames between spatial layer input files");
2013 }
2014 }
2015
2016 aom_codec_iter_t iter = NULL;
2017 const aom_codec_cx_pkt_t *pkt;
2018 int layer = 0;
2019 // Flag for superframe whose base is key.
2020 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2021 // For flexible mode:
2022 if (app_input.layering_mode >= 0) {
2023 // Set the reference/update flags, layer_id, and reference_map
2024 // buffer index.
2025 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2026 &ref_frame_config, &ref_frame_comp_pred,
2027 &use_svc_control, slx, is_key_frame,
2028 (app_input.layering_mode == 10), app_input.speed);
2029 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2030 if (use_svc_control) {
2032 &ref_frame_config);
2034 &ref_frame_comp_pred);
2035 }
2036 if (app_input.multilayer_metadata_file != NULL) {
2037 add_multilayer_metadata(&raw, multilayer_metadata);
2038 }
2039 // Set the speed per layer.
2040 if (test_speed_per_layer) {
2041 int speed_per_layer = 10;
2042 if (layer_id.spatial_layer_id == 0) {
2043 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2044 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2045 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2046 } else if (layer_id.spatial_layer_id == 1) {
2047 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2048 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2049 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2050 } else if (layer_id.spatial_layer_id == 2) {
2051 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2052 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2053 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2054 }
2055 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2056 }
2057 } else {
2058 // Only up to 3 temporal layers supported in fixed mode.
2059 // Only need to set spatial and temporal layer_id: reference
2060 // prediction, refresh, and buffer_idx are set internally.
2061 layer_id.spatial_layer_id = slx;
2062 layer_id.temporal_layer_id = 0;
2063 if (ts_number_layers == 2) {
2064 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2065 } else if (ts_number_layers == 3) {
2066 if (frame_cnt % 2 != 0)
2067 layer_id.temporal_layer_id = 2;
2068 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2069 layer_id.temporal_layer_id = 1;
2070 }
2071 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2072 }
2073
2074 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2075 // Set error_resilient per frame: off/0 for base layer and
2076 // on/1 for enhancement layer frames.
2077 // Note that this is can only be done on the fly/per-frame/layer
2078 // if the config error_resilience is off/0. See the logic for updating
2079 // in set_encoder_config():
2080 // tool_cfg->error_resilient_mode =
2081 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2082 const int err_resil_mode =
2083 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2085 err_resil_mode);
2086 }
2087
2088 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2089 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2090
2091 if (test_dynamic_scaling_single_layer) {
2092 // Example to scale source down by 2x2, then 4x4, and then back up to
2093 // 2x2, and then back to original.
2094 int frame_2x2 = 200;
2095 int frame_4x4 = 400;
2096 int frame_2x2up = 600;
2097 int frame_orig = 800;
2098 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2099 // Scale source down by 2x2.
2100 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2101 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2102 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2103 // Scale source down by 4x4.
2104 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2105 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2106 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2107 // Source back up to 2x2.
2108 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2109 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2110 } else if (frame_cnt >= frame_orig) {
2111 // Source back up to original resolution (no scaling).
2112 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2113 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2114 }
2115 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2116 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2117 // For dynamic resize testing on single layer: refresh all references
2118 // on the resized frame: this is to avoid decode error:
2119 // if resize goes down by >= 4x4 then libaom decoder will throw an
2120 // error that some reference (even though not used) is beyond the
2121 // limit size (must be smaller than 4x4).
2122 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2123 if (use_svc_control) {
2125 &ref_frame_config);
2127 &ref_frame_comp_pred);
2128 }
2129 }
2130 }
2131
2132 // Change target_bitrate every other frame.
2133 if (test_changing_bitrate && frame_cnt % 2 == 0) {
2134 if (frame_cnt < 500)
2135 cfg.rc_target_bitrate += 10;
2136 else
2137 cfg.rc_target_bitrate -= 10;
2138 // Do big increase and decrease.
2139 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2140 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2141 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2142 // Call change_config, or bypass with new control.
2143 // res = aom_codec_enc_config_set(&codec, &cfg);
2145 cfg.rc_target_bitrate))
2146 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2147 }
2148
2149 if (rc_api) {
2150 aom::AV1FrameParamsRTC frame_params;
2151 // TODO(jianj): Add support for SVC.
2152 frame_params.spatial_layer_id = 0;
2153 frame_params.temporal_layer_id = 0;
2154 frame_params.frame_type =
2155 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2156 rc_api->ComputeQP(frame_params);
2157 const int current_qp = rc_api->GetQP();
2159 qindex_to_quantizer(current_qp))) {
2160 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2161 }
2162 }
2163
2164 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2165
2166 // Do the layer encode.
2167 aom_usec_timer_start(&timer);
2168 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2169 die_codec(&codec, "Failed to encode frame");
2170 aom_usec_timer_mark(&timer);
2171 cx_time += aom_usec_timer_elapsed(&timer);
2172 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2173 frame_cnt_layer[layer] += 1;
2174
2175 // Get the high motion content flag.
2176 int content_flag = 0;
2178 &content_flag)) {
2179 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2180 }
2181
2182 got_data = 0;
2183 // For simulcast (mode 11): write out each spatial layer to the file.
2184 int ss_layers_write = (app_input.layering_mode == 11)
2185 ? layer_id.spatial_layer_id + 1
2186 : ss_number_layers;
2187 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2188 switch (pkt->kind) {
2190 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2191 ++sl) {
2192 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2193 ++tl) {
2194 int j = sl * ts_number_layers + tl;
2195 if (app_input.output_obu) {
2196 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2197 obu_files[j]);
2198 } else {
2199 aom_video_writer_write_frame(
2200 outfile[j],
2201 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2202 pkt->data.frame.sz, pts);
2203 }
2204 if (sl == layer_id.spatial_layer_id)
2205 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2206 }
2207 }
2208 got_data = 1;
2209 // Write everything into the top layer.
2210 if (app_input.output_obu) {
2211 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2212 total_layer_obu_file);
2213 } else {
2214 aom_video_writer_write_frame(
2215 total_layer_file,
2216 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2217 pkt->data.frame.sz, pts);
2218 }
2219 // Keep count of rate control stats per layer (for non-key).
2220 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2221 int j = layer_id.spatial_layer_id * ts_number_layers +
2222 layer_id.temporal_layer_id;
2223 assert(j >= 0);
2224 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2225 rc.layer_avg_rate_mismatch[j] +=
2226 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2227 rc.layer_pfb[j];
2228 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2229 }
2230
2231 if (rc_api) {
2232 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2233 }
2234 // Update for short-time encoding bitrate states, for moving window
2235 // of size rc->window, shifted by rc->window / 2.
2236 // Ignore first window segment, due to key frame.
2237 // For spatial layers: only do this for top/highest SL.
2238 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2239 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2240 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2241 if (frame_cnt % rc.window_size == 0) {
2242 rc.window_count += 1;
2243 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2244 rc.variance_st_encoding_bitrate +=
2245 (sum_bitrate / rc.window_size) *
2246 (sum_bitrate / rc.window_size);
2247 sum_bitrate = 0.0;
2248 }
2249 }
2250 // Second shifted window.
2251 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2252 slx == ss_number_layers - 1) {
2253 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2254 if (frame_cnt > 2 * rc.window_size &&
2255 frame_cnt % rc.window_size == 0) {
2256 rc.window_count += 1;
2257 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2258 rc.variance_st_encoding_bitrate +=
2259 (sum_bitrate2 / rc.window_size) *
2260 (sum_bitrate2 / rc.window_size);
2261 sum_bitrate2 = 0.0;
2262 }
2263 }
2264
2265#if CONFIG_AV1_DECODER
2266 if (app_input.decode) {
2267 if (aom_codec_decode(
2268 &decoder,
2269 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2270 pkt->data.frame.sz, NULL))
2271 die_codec(&decoder, "Failed to decode frame");
2272 }
2273#endif
2274
2275 break;
2276 case AOM_CODEC_PSNR_PKT:
2277 if (app_input.show_psnr) {
2278 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2279 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2280 for (int plane = 0; plane < 4; plane++) {
2281 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2282 }
2283 psnr_stream.psnr_count[0]++;
2284 }
2285 break;
2286 default: break;
2287 }
2288 }
2289#if CONFIG_AV1_DECODER
2290 if (got_data && app_input.decode) {
2291 // Don't look for mismatch on top spatial and top temporal layers as
2292 // they are non reference frames.
2293 if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2294 !(layer_id.temporal_layer_id > 0 &&
2295 layer_id.temporal_layer_id == ts_number_layers - 1)) {
2296 if (test_decode(&codec, &decoder, frame_cnt)) {
2297#if CONFIG_INTERNAL_STATS
2298 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2299 frame_cnt);
2300 fclose(stats_file);
2301#endif
2302 fatal("Mismatch seen");
2303 }
2304 }
2305 }
2306#endif
2307 } // loop over spatial layers
2308 ++frame_cnt;
2309 pts += frame_duration;
2310 }
2311
2312 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2313 if (app_input.input_ctx[i].filename == NULL) {
2314 break;
2315 }
2316 close_input_file(&(app_input.input_ctx[i]));
2317 }
2318 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2319 ts_number_layers);
2320
2321 printf("\n");
2322 for (int slx = 0; slx < ss_number_layers; slx++)
2323 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2324 int lx = slx * ts_number_layers + tlx;
2325 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2326 slx, tlx, frame_cnt_layer[lx],
2327 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2328 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2329 }
2330
2331 printf("\n");
2332 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2333 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2334 1000000 * (double)frame_cnt / (double)cx_time);
2335
2336 if (app_input.show_psnr) {
2337 show_psnr(&psnr_stream, 255.0);
2338 }
2339
2340 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2341
2342#if CONFIG_AV1_DECODER
2343 if (app_input.decode) {
2344 if (aom_codec_destroy(&decoder))
2345 die_codec(&decoder, "Failed to destroy decoder");
2346 }
2347#endif
2348
2349#if CONFIG_INTERNAL_STATS
2350 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2351 fclose(stats_file);
2352#endif
2353
2354 // Try to rewrite the output file headers with the actual frame count.
2355 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2356 aom_video_writer_close(outfile[i]);
2357 aom_video_writer_close(total_layer_file);
2358
2359 if (has_non_y4m_input) {
2360 aom_img_free(&raw);
2361 }
2362 return EXIT_SUCCESS;
2363}
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
Describes the aom image descriptor and associated operations.
@ AOM_MIF_KEY_FRAME
Definition aom_image.h:166
@ AOM_CSP_UNKNOWN
Definition aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition aom_image.h:38
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
@ AOM_IMG_FMT_I420
Definition aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
int aom_img_add_metadata(aom_image_t *img, uint32_t type, const uint8_t *data, size_t sz, aom_metadata_insert_flags_t insert_flag)
Add metadata to image.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition aomcx.h:1732
#define AOM_MAX_TS_LAYERS
Definition aomcx.h:1734
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
@ AOM_FULL_SUPERFRAME_DROP
Definition aomcx.h:1794
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition aomcx.h:1536
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition aomcx.h:1077
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition aomcx.h:414
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition aomcx.h:474
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition aomcx.h:1285
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter.
Definition aomcx.h:1295
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition aomcx.h:503
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition aomcx.h:512
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition aomcx.h:1124
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition aomcx.h:1263
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition aomcx.h:1212
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition aomcx.h:1400
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition aomcx.h:1120
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition aomcx.h:1045
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition aomcx.h:1435
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition aomcx.h:1243
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition aomcx.h:677
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition aomcx.h:1366
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition aomcx.h:1549
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition aomcx.h:1290
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition aomcx.h:1066
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition aomcx.h:1116
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition aomcx.h:1095
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition aomcx.h:312
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition aomcx.h:448
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition aomcx.h:704
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition aomcx.h:1557
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition aomcx.h:1415
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition aomcx.h:872
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition aomcx.h:1140
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition aomcx.h:1573
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition aomcx.h:1035
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition aomcx.h:1564
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition aomcx.h:345
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition aomcx.h:1498
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition aomcx.h:1253
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition aomcx.h:1579
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition aom_codec.h:232
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition aom_codec.h:271
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t
Algorithm return codes.
Definition aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition aom_codec.h:542
const void * aom_codec_iter_t
Iterator.
Definition aom_codec.h:305
#define AOM_FRAME_IS_KEY
Definition aom_codec.h:288
@ AOM_BITS_8
Definition aom_codec.h:336
@ AOM_BITS_10
Definition aom_codec.h:337
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition aom_decoder.h:129
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition aom_encoder.h:943
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition aom_encoder.h:1016
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition aom_encoder.h:79
@ AOM_CBR
Definition aom_encoder.h:187
@ AOM_KF_AUTO
Definition aom_encoder.h:202
@ AOM_CODEC_PSNR_PKT
Definition aom_encoder.h:113
@ AOM_CODEC_CX_FRAME_PKT
Definition aom_encoder.h:110
aom active region map
Definition aomcx.h:1640
unsigned int rows
Definition aomcx.h:1643
unsigned int cols
Definition aomcx.h:1644
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition aomcx.h:1642
Codec context structure.
Definition aom_codec.h:315
Encoder output packet.
Definition aom_encoder.h:122
size_t sz
Definition aom_encoder.h:127
enum aom_codec_cx_pkt_kind kind
Definition aom_encoder.h:123
double psnr[4]
Definition aom_encoder.h:145
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
aom_codec_frame_flags_t flags
Definition aom_encoder.h:132
void * buf
Definition aom_encoder.h:126
Encoder configuration structure.
Definition aom_encoder.h:387
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition aom_encoder.h:475
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition aom_encoder.h:540
struct aom_rational g_timebase
Stream timebase units.
Definition aom_encoder.h:489
unsigned int g_usage
Algorithm specific "usage" value.
Definition aom_encoder.h:399
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition aom_encoder.h:705
unsigned int g_h
Height of the frame.
Definition aom_encoder.h:435
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition aom_encoder.h:768
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition aom_encoder.h:623
unsigned int g_threads
Maximum number of threads to use.
Definition aom_encoder.h:407
unsigned int kf_min_dist
Keyframe minimum interval.
Definition aom_encoder.h:777
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition aom_encoder.h:518
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition aom_encoder.h:714
unsigned int g_profile
Bitstream profile to use.
Definition aom_encoder.h:417
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition aom_encoder.h:467
unsigned int g_w
Width of the frame.
Definition aom_encoder.h:426
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition aom_encoder.h:681
unsigned int kf_max_dist
Keyframe maximum interval.
Definition aom_encoder.h:786
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition aom_encoder.h:497
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition aom_encoder.h:668
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition aom_encoder.h:723
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition aom_encoder.h:658
unsigned int rc_target_bitrate
Target data rate.
Definition aom_encoder.h:644
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition aom_encoder.h:549
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition aom_encoder.h:690
Image Descriptor.
Definition aom_image.h:182
aom_img_fmt_t fmt
Definition aom_image.h:183
unsigned int d_w
Definition aom_image.h:197
unsigned int d_h
Definition aom_image.h:198
int num
Definition aom_encoder.h:165
int den
Definition aom_encoder.h:166
aom image scaling mode
Definition aomcx.h:1652
Definition aomcx.h:1737
int temporal_layer_id
Definition aomcx.h:1739
int spatial_layer_id
Definition aomcx.h:1738
Definition aomcx.h:1748
int max_quantizers[32]
Definition aomcx.h:1751
int number_spatial_layers
Definition aomcx.h:1749
int layer_target_bitrate[32]
Definition aomcx.h:1756
int framerate_factor[8]
Definition aomcx.h:1758
int min_quantizers[32]
Definition aomcx.h:1752
int scaling_factor_den[4]
Definition aomcx.h:1754
int number_temporal_layers
Definition aomcx.h:1750
int scaling_factor_num[4]
Definition aomcx.h:1753
Definition aomcx.h:1785
int use_comp_pred[3]
Definition aomcx.h:1788
Definition aomcx.h:1762
int reference[7]
Definition aomcx.h:1778
int refresh[8]
Definition aomcx.h:1781
int ref_idx[7]
Definition aomcx.h:1780